{
library(dplyr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(plotly)
}
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
setwd('/home/nicole/Data Science/exam_big_data')
pop <- read.csv("Datasets/total_population.csv", skip=4)
{
w <- pop %>%
filter(Country.Name=="World")%>%
select(-Country.Name, -Country.Code, -Indicator.Name, -Indicator.Code, -X)
colnames(w) <- c(substring(colnames(w[,1:length(w)]), 2))
colnames(w)
w[2,]<- colnames(w)
w <- data.frame(t(w))
colnames(w) <- c("world_pop", "year")
w$world_pop <- as.numeric(as.character(w$world_pop))
w$year <- as.numeric(as.character(w$year))
}
newdata <- seq(2018,2100)
newdata <- data.frame(newdata)
colnames(newdata)[1] <- "year"
# linear model
mod <- lm(world_pop ~ year, w)
lmod <- predict(mod, newdata)
newdata$lm <- data.frame(lmod)
#newdata <- newdata %>%
# select(lm, year)
## Logistic model!! :)
mod <- nls(world_pop ~ SSlogis(year, phi1, phi2, phi3), data = w)
#summary(mod)
pred <- predict(mod, newdata)
#pred[1:83]
vv <- data.frame(pred)
newdata$world_pop <- data.frame(pred[1:83])
newdata <- newdata %>%
select(world_pop, year, lm)
newdata$world_pop <- as.numeric(unlist(newdata$world_pop))
newdata$lm <- as.numeric(unlist(newdata$lm))
w$lm <- w$world_pop
tot <- rbind(w, newdata)
p1 <- plot_ly(tot, x = ~year, y = ~world_pop, name = 'Logistic model', type = 'scatter', mode = 'lines+markers') %>%
add_trace(y = ~lm, name = 'Lineaar model', mode = 'lines+markers') %>%
layout(title = "Predicted trend of world population growth until 2100",
xaxis = list(title = "Year"),
yaxis = list (title = paste("Pop qunatity ")))
p1